# Author: Benedetta Carlotti 
# Corresponding email: benedetta.carlotti@sns.it
# Manuscript: The odd couple: analyzing United Kingdom Independence Party (UKIP) and Italian Five Stars Movement’s (FSM’s) European Union (EU)-opposition in the European Parliament (EP)
# Replication material (script) for TABLE 2 of the manuscript
# Please note: the following scripts are used to calculate the average length in tokens of .txt files and its SD only

# Required packages 
require(quanteda)
require(readtext)

#DESCRIPTIVE STATISTICS CORPUS UKIP ECONOMIC POLICY 
file_txt_UKIP_ECO<- readtext("insert corresponding directory here")
corpus_UKIP_ECO<-corpus(file_txt_UKIP_ECO) #creation of a corpus consisting 419 .txt files 
info_corpus_UKIP_ECO<-summary(corpus_UKIP_ECO, 419) # creation of a dataset containing the corpus' info
info_corpus_UKIP_ECO$average_tokens<-mean(info_corpus_UKIP_ECO$Tokens) #creation of the variable: average text lenght in tokens
info_corpus_UKIP_ECO$sd_mean_tokens<-sd(info_corpus_UKIP_ECO$Tokens) #creation of the variable: SD of the average text length in tokens
write.csv(info_corpus_UKIP_ECO, file = "UKIP_ECO_STAT.csv",row.names=FALSE)

#DESCRIPTIVE STATISTICS CORPUS UKIP IMMIGRATION POLICY 
file_txt_UKIP_IMM<- readtext("insert corresponding directory here")
corpus_UKIP_IMM<-corpus(file_txt_UKIP_IMM) # creation of a corpus consisting of 298 .txt files  
info_corpus_UKIP_IMM<-summary(corpus_UKIP_IMM, 298) 
info_corpus_UKIP_IMM$average_tokens<-mean(info_corpus_UKIP_IMM$Tokens)
info_corpus_UKIP_IMM$sd_mean_tokens<-sd(info_corpus_UKIP_IMM$Tokens)
write.csv(info_corpus_UKIP_IMM, file = "UKIP_IMM_STAT.csv",row.names=FALSE)

#DESCRIPTIVE STATISTICS CORPUS UKIP ENVIRONMENTAL POLICY 
file_txt_UKIP_ENV<- readtext("insert corresponding directory here")
corpus_UKIP_ENV<-corpus(file_txt_UKIP_ENV) #creation of a corpus consisting 419 .txt files 
info_corpus_UKIP_ENV<-summary(corpus_UKIP_ENV, 244) 
info_corpus_UKIP_ENV$average_tokens<-mean(info_corpus_UKIP_ENV$Tokens)
info_corpus_UKIP_ENV$sd_mean_tokens<-sd(info_corpus_UKIP_ENV$Tokens)
write.csv(info_corpus_UKIP_ENV, file = "UKIP_ENV_STAT.csv",row.names=FALSE)

# DESCRIPTIVE STATISTICS CORPUS FSM ECONOMIC POLICY 
# charging the required data (multiple txt files)
file_txt_FSM_ECO<- readtext("insert corresponding directory here")
corpus_FSM_ECO<-corpus(file_txt_FSM_ECO) #creation of a corpus consisting of 320 .txt files 
info_corpus_FSM_ECO<-summary(corpus_FSM_ECO, 321) #creation of a dataset containing the corpus'summary info (number of types, tokens and sentences)
info_corpus_FSM_ECO$average_tokens<-mean(info_corpus_FSM_ECO$Tokens) # creation of the average text length in tokens
info_corpus_FSM_ECO$sd_mean_tokens<-sd(info_corpus_FSM_ECO$Tokens)# creation of the variable: sd text length 
write.csv(info_corpus_FSM_ECO, file = "FSM_ECO_STAT.csv",row.names=FALSE)

# DESCRIPTIVE STATISTICS CORPUS FSM IMMIGRATION POLICY 
# charging the required data (multiple txt files)
file_txt_FSM_IMM <- readtext("insert corresponding directory here")
corpus_FSM_IMM<-corpus(file_txt_FSM_IMM) #corpus consisting of 121 .txt.files 
info_corpus_FSM_IMM<-summary(corpus_FSM_IMM, 121)
info_corpus_FSM_IMM$average_tokens<-mean(info_corpus_FSM_IMM$Tokens)
info_corpus_FSM_IMM$sd_mean_tokens<-sd(info_corpus_FSM_IMM$Tokens)
write.csv(info_corpus_FSM_IMM, file = "FSM_IMM_STAT.csv",row.names=FALSE)

# DESCRIPTIVE STATISTICS CORPUS FSM ENVIRONMENTAL POLICY 
# charging the required data (multiple txt files)
file_txt_FSM_ENV<- readtext("insert corresponding directory here")
corpus_FSM_ENV<-corpus(file_txt_FSM_ENV) #corpus consisting of 229 .txt files 
info_corpus_FSM_ENV<-summary(corpus_FSM_ENV, 229)
info_corpus_FSM_ENV$average_tokens<-mean(info_corpus_FSM_ENV$Tokens)
info_corpus_FSM_ENV$sd_mean_tokens<-sd(info_corpus_FSM_ENV$Tokens)
write.csv(info_corpus_FSM_ENV, file = "FSM_ENV_STAT.csv",row.names=FALSE)

